﻿#-*- coding: utf-8 -*-

import urllib.request
import re

pythontagurl = "http://www.cnblogs.com/itech/category/170012.html"
pythonarticleurlregrex = "(<a.*?href=\"http://www.cnblogs.com/itech/archive.*?>([Pp]ython.*?)</a>)"

# get the page content string which contains all python article links
pythontagpage = urllib.request.urlopen(pythontagurl)
pythontagstr = ""
for line in pythontagpage.readlines():
   try:
     newline = line.decode('utf-8', 'strict')
     #print(newline)
   except:
     continue
   pythontagstr +=  newline
pythontagpage.close()

# get all link and sort 
pythonlinkandtiles = re.findall(pythonarticleurlregrex, pythontagstr)
d = dict()
for link, title in pythonlinkandtiles:
  d[title] = link
pythontitles = list(d.keys())
bstr1 = "python基础"
bstr2 = "python语法"
estr = "python实例"
lstr = "python类库"
tstr = "python技巧"
ostr = "python其他"
basic = []
examples = []
libs = []
tips = []
others = []
for k in pythontitles:
  if k.startswith(bstr1) or k.startswith(bstr2):
     basic.append(k)
  elif k.startswith(estr) :
     examples.append(k)
  elif k.startswith(lstr) :
     libs.append(k)
  elif k.startswith(tstr):
     tips.append(k)
  else:
     others.append(k)
basic.sort()
libs.sort()
examples.sort()
tips.sort()
others.sort()

pythonarticles = []
fonts = "<br/><font color=red size = 5>"
fonte = ":</font>"
pythonarticles.append( fonts + bstr1 + fonte )
for py in basic: pythonarticles.append(d[py]) 
pythonarticles.append(fonts + lstr + fonte )
for py in libs: pythonarticles.append(d[py])
pythonarticles.append(fonts + estr + fonte ) 
for py in examples: pythonarticles.append(d[py]) 
pythonarticles.append(fonts + tstr + fonte )
for py in tips: pythonarticles.append(d[py])
pythonarticles.append(fonts + ostr + fonte )
for py in others: pythonarticles.append(d[py]) 

# generate pythonindex.html
pythonindex = open("pythonindex.html", "w",encoding='utf-8')
pythonindex.write("<html>")
pythonindex.write("<head>")
pythonindex.write("<meta http-equiv='Content-Type' content='text/html; charset=utf-8'/>")
pythonindex.write("<title>Python - iTech's Blog</title>")
pythonindex.write("</head>")
pythonindex.write("<body>")
pythonindex.write("Total number is :" + str(len(pythonarticles)) + "</br>")
for pa in pythonarticles:
  pythonindex.write(pa)
  pythonindex.write("</br>")
pythonindex.write("</body>")
pythonindex.write("</html>")
pythonindex.close()